#Data from Bureau of Labor Statistics: Food and beverages in U.S. city average, all urban consumers, not seasonally adjusted. Downloaded all monthly data from 1967 forward from https://data.bls.gov/dataViewer/view/timeseries/CUUR0000SAF;jsessionid=40005007FB9F133245ECFC6162596EB0
#input the data, mutate the time column to yearmonth and convert to a tsibble
food_prices <- readr::read_csv("file.csv", show_col_types=FALSE) |>
mutate(Month=yearmonth(Label))|>
filter(year(Month)>1999)|>
dplyr::select(Month, Value) |>
as_tsibble(index=Month)
food_prices<-na.locf(food_prices)
#View tsibble
print(food_prices)
## # A tsibble: 302 x 2 [1M]
## Month Value
## <mth> <dbl>
## 1 2000 Jan 167.
## 2 2000 Feb 167.
## 3 2000 Mar 167.
## 4 2000 Apr 167.
## 5 2000 May 168.
## 6 2000 Jun 168.
## 7 2000 Jul 169.
## 8 2000 Aug 169.
## 9 2000 Sep 169.
## 10 2000 Oct 170.
## # ℹ 292 more rows
food_prices |>
model(
classical_decomposition(Value, type = "additive")
) |>
components() |>
autoplot() +
labs(title = "Classical additive decomposition of US Food & Beverage Prices")
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).
food_prices|>
autoplot(Value) +
labs( y="US Dollar", title="US Food Prices")
food_prices|>
ACF(Value)|>
autoplot() +labs(title="US Food Prices ACF")
food_prices|>
gg_season(Value, labels="both")+
labs(y="US $", title="Seasonal plot: US Food and Beverage Prices")
food_prices |>
gg_subseries(Value)+
labs(y = "US$", title="US Food and Beverage Costs")
#import CPI data for US economy from BLS:
#https://fred.stlouisfed.org/series/CPIAUCNS
us_economy <- readr::read_csv("CPIAUCNS.csv", show_col_types=FALSE) |>
mutate(Month=yearmonth(observation_date))|>
filter(year(Month)>2000)|>
dplyr::select(CPIAUCNS, Month) |>
as_tsibble(index=Month)
print(us_economy)
## # A tsibble: 290 x 2 [1M]
## CPIAUCNS Month
## <dbl> <mth>
## 1 175. 2001 Jan
## 2 176. 2001 Feb
## 3 176. 2001 Mar
## 4 177. 2001 Apr
## 5 178. 2001 May
## 6 178 2001 Jun
## 7 178. 2001 Jul
## 8 178. 2001 Aug
## 9 178. 2001 Sep
## 10 178. 2001 Oct
## # ℹ 280 more rows
food_prices|>
left_join(us_economy, by="Month") |>
mutate(Adjusted_Value= Value/CPIAUCNS *100) |>
pivot_longer(c(Value, Adjusted_Value), values_to="Value")|>
mutate(name=factor(name,
levels=c("Value","Adjusted_Value"))) |>
ggplot(aes(x=Month, y=Value))+
geom_line()+
facet_grid(name ~.,scales="free_y")+
labs(title="US Food & Beverage Prices",y="US$")
### Seasonal Plot Inflation Adjusted
#created adjusted food price tsibble
food_prices_adj<-food_prices|>
left_join(us_economy, by="Month") |>
mutate(Adjusted_Value= Value/CPIAUCNS *100) |>
dplyr::select(Adjusted_Value,Month)
#remove nas
food_prices_adj<-na.locf(food_prices_adj)
#seasonal plot of adjusted food prices
food_prices_adj|>
gg_season(Adjusted_Value, labels="both")+
labs(y="US $", title="Seasonal plot: US Food and Beverage Prices")
### Autocorrelation Function Inflation Adjusted
food_prices_adj|>
ACF(Adjusted_Value)|>
autoplot() +labs(title="US Food Prices ACF")
food_prices_adj |>
gg_subseries(Adjusted_Value)+
labs(y = "US$", title="US Food and Beverage Costs")
food_prices_adj |>
model(
classical_decomposition(Adjusted_Value, type = "additive")
) |>
components() |>
autoplot() +
labs(title = "Classical additive decomposition of Inflation Adjusted US Food & Beverage Prices")
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).
The US Food and Beverage dataset shows a clear upward trend in the cost of food and beverages from 2000 to present. In looking at the unadjusted data the trend appears significant and constant; however, when data is adjusted for inflation the trend is still upward but appears to have more cyclical movement.
While the decomposition graphs of both the unadjusted and adjusted data show what appears to be significant seasonality in the data, the seasonality is somewhat minor. When the seasonal subseries plot is looked at in the unadjusted data, there appears to be almost no variation in monthly costs even though you would expect to see that with seasonally impacted data. The seasonality is a bit more noticeable when the seasonal subseries plot is shown for the adjusted for inflation data. Here it becomes apparent that food costs are lower in the summer months and higher in the winter months.
#pulling the stock ticker for SP500 Food Products Index - Same time range as other dataset
getSymbols("^SP500-302020",
src="yahoo",
from = "2000-01-07",
to = "2025-01-31")
## Warning: ^SP500-302020 contains missing values. Some functions will not work if
## objects contain missing values in the middle of the series. Consider using
## na.omit(), na.approx(), na.fill(), etc to remove or replace them.
## [1] "SP500-302020"
# Get the closing prices as an xts object
stock_data <- Cl(`SP500-302020`)
# Create a complete time sequence including weekends
all_dates <- seq.Date(from = as.Date("2000-01-07"), to = as.Date("2025-01-31"), by = "day")
stock_data_full <- merge(stock_data, xts(, all_dates)) # Merge with full date sequence
#adjusting the column names
colnames(stock_data_full) <- c("Close")
# Check the result
head(stock_data_full)
## Close
## 2000-01-07 163.26
## 2000-01-08 NA
## 2000-01-09 NA
## 2000-01-10 160.20
## 2000-01-11 161.43
## 2000-01-12 160.47
# Forward fill the missing values
stock_data_filled <- na.locf(stock_data_full)
# View the data after filling missing values
head(stock_data_filled)
## Close
## 2000-01-07 163.26
## 2000-01-08 163.26
## 2000-01-09 163.26
## 2000-01-10 160.20
## 2000-01-11 161.43
## 2000-01-12 160.47
plot(stock_data_filled)
## Additive Decomposition Plot
SP500_ts <- ts(stock_data_filled,frequency=365)
plot(decompose(SP500_ts))
SP500_ts.decom <- decompose(SP500_ts, type = "mult")
plot(SP500_ts.decom)
Trend <- SP500_ts.decom$trend
Seasonal <- SP500_ts.decom$seasonal
ts.plot(cbind(Trend, Trend * Seasonal), lty=1:2)
acf(SP500_ts)
# Convert xts to tsibble
df <- data.frame(Date = zoo::index(stock_data_filled), Close = zoo::coredata(stock_data_filled))
tsibble_data <- as_tsibble(df, index = Date)
#plot the seasonality chart
tsibble_data %>%
gg_season(Close, labels = "both") +
labs(y = "Closing Price ($)",
title = "Seasonal plot: S&P500 Food Price Index")
#take a subset of the data for readability of chart
tsibble_data_subset <- tail(tsibble_data,100)
#create moving averages
tsibble_data_ma <- tsibble_data_subset |>
mutate(
`5-MA` = slider::slide_dbl(Close, mean,
.before = 2, .after = 2, .complete = TRUE),
'7-MA' = slider::slide_dbl(Close, mean,
.before = 3, .after = 3, .complete = TRUE)
)
#plot the data
tsibble_data_ma |>
autoplot(Close) +
geom_line(aes(y = `5-MA`), colour = "orange") +
geom_line(aes(y = `7-MA`), colour = "green") +
labs(y = "Closing Price ($)",
x = "Month",
title = "Closing Price for S&P500 Food Price Index")
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).
As we would expect from data on a stock index, there is a clear upward trend over the last 25 years. This would be seen by almost all sectors of the S&P500, where there is flucutation within date ranges, but over a long time horizon (i.e. 20+ years) you will almost certainly see an increasing trend. Surprisingly, the decomposition showed that there is some seasonality to the stock index which isn’t as pronounced when looking at the seasonality chart (visualization 3). I can see some seasonality in that there are a few years where there is downturns around July and most years increase the most in December. This dataset is certainly non-stationary as there is a clear increasing trend therefore the mean will change over time (as seen in the moving average chart - visualization 4).
We will convert both tables or tibbles to data frames and do a merge to create one data set. Stock data is daily and food data is monthly so we will only look at the close on the last day of each month to do a merge.
# Convert stock_data_filled from xts to a data frame
stock_data_filled_df <- data.frame(
Month = index(stock_data_filled),
Close = as.vector(stock_data_filled$Close)
)
stock_data_filled_df$Month <- as.Date(stock_data_filled_df$Month)
# Get last day of each month for stock data
stock_data_filled_df <- stock_data_filled_df %>%
group_by(YearMonth = format(Month, "%Y %b")) %>%
filter(Month == max(Month)) %>%
ungroup() %>%
dplyr::select(Month = YearMonth, Close)
# Convert food_prices_adj from tibble to a data frame
food_prices_adj_df <- as.data.frame(food_prices_adj)
food_prices_adj_df$Month <- as.character(food_prices_adj_df$Month)
# Merge
merged_df <- merge(stock_data_filled_df, food_prices_adj_df, by = "Month")
# Create a merged tibble
merged_df <- merged_df %>%
mutate(Month = yearmonth(Month)) %>%
as_tsibble(index = Month)
head(merged_df)
The ‘merged_df’ now has both variables of interest and Months. We can use this to build our models. Below we will plot the data to see trends visually.
Adjusted_Plot <- ggplot(merged_df, aes(x = Month, y = Adjusted_Value)) +
geom_line(color = "blue") +
labs(x = "Month", y = "Adjusted Value", title = "Food Prices")
Close_Plot <- ggplot(merged_df, aes(x = Month, y = Close)) +
geom_line(color = "green") +
labs(x = "Month", y = "Close", title = "Stock Index")
grid.arrange(Adjusted_Plot, Close_Plot, ncol = 1)
We need to see if the stock and Food Price data is non-stationary or stationary prior to building an ARIMA or ETS model.
# ADF for Adjusted_Value
adf_adjusted <- ur.df(merged_df$Adjusted_Value, type = "none", selectlags = "AIC")
# ADF for Close
adf_close <- ur.df(merged_df$Close, type = "none", selectlags = "AIC")
summary(adf_adjusted)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.87825 -0.22500 -0.00856 0.20905 1.90080
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 0.0001144 0.0002141 0.534 0.594
## z.diff.lag 0.4795319 0.0519816 9.225 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.366 on 285 degrees of freedom
## Multiple R-squared: 0.2324, Adjusted R-squared: 0.227
## F-statistic: 43.15 on 2 and 285 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: 0.5342
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
summary(adf_close)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -60.870 -6.631 1.063 8.798 70.010
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 0.001875 0.002286 0.820 0.413
## z.diff.lag -0.036558 0.059438 -0.615 0.539
##
## Residual standard error: 17.24 on 285 degrees of freedom
## Multiple R-squared: 0.003392, Adjusted R-squared: -0.003602
## F-statistic: 0.485 on 2 and 285 DF, p-value: 0.6162
##
##
## Value of test-statistic is: 0.8202
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
With test statistics of Adjusted_Value = 0.5342 and Close = 0.8202 we can conclude both data sets are non-stationary. We can try differencing them to try to make them stationary.
# Differencing for Adjusted_Value and Close
merged_df$diff_adjusted_value <- c(NA, diff(merged_df$Adjusted_Value))
merged_df$diff_close <- c(NA, diff(merged_df$Close))
diff_adjusted_value <- diff(merged_df$Adjusted_Value)
diff_close <- diff(merged_df$Close)
merged_df <- merged_df[-1, ]
merged_df$diff_adjusted_value <- diff_adjusted_value
merged_df$diff_close <- diff_close
# New ADFs for Adjusted and Closed after differencing
adf_adjusted <- ur.df(merged_df$diff_adjusted_value, type = "none", selectlags = "AIC")
adf_close <- ur.df(merged_df$diff_close, type = "none", selectlags = "AIC")
summary(adf_adjusted)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78027 -0.21058 -0.00552 0.23367 1.86962
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 -0.62177 0.05922 -10.500 < 2e-16 ***
## z.diff.lag 0.19920 0.05816 3.425 0.000705 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3594 on 284 degrees of freedom
## Multiple R-squared: 0.2886, Adjusted R-squared: 0.2836
## F-statistic: 57.61 on 2 and 284 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -10.4998
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
summary(adf_close)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58.307 -5.811 1.966 10.084 70.565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 -1.17044 0.08527 -13.726 <2e-16 ***
## z.diff.lag 0.13294 0.05940 2.238 0.026 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.14 on 284 degrees of freedom
## Multiple R-squared: 0.524, Adjusted R-squared: 0.5207
## F-statistic: 156.3 on 2 and 284 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -13.7257
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
We now have test statistics of Adjusted_Value = -10.49 and Close = -13.73 so our data is now stationary.
Our goal is to predict future adjusted values of food prices as accurately as possible. We will create 4 models for forecasting and evaluate them at the end to see what model is best suitable for us.
The models we are exploring are:
Here we will use our new differenced Adjusted_Values to create an ARIMA model using ‘auto.arima’. This will find the best input combination. Then we will plot 12 months of predictions and summarise the model and compare with others later.
# Fit ARIMA differenced Adjusted_Value and forecast the next year
arima_model <- auto.arima(diff_adjusted_value)
forecast_arima <- forecast(arima_model, h = 12)
plot(forecast_arima)
summary(arima_model)
## Series: diff_adjusted_value
## ARIMA(2,0,2) with zero mean
##
## Coefficients:
## ar1 ar2 ma1 ma2
## 1.2162 -0.3735 -0.6676 -0.1956
## s.e. 0.1099 0.1088 0.1182 0.1226
##
## sigma^2 = 0.1259: log likelihood = -108.52
## AIC=227.04 AICc=227.25 BIC=245.35
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02655245 0.3523919 0.2612083 -10.50008 333.6203 0.8119732
## ACF1
## Training set -0.007213114
Then we will use the same differenced Adjusted_Values to create, plot and summarise an ETS model.
# Fit ETS differenced Adjusted_Value and forecast the next year
ets_model <- ets(diff_adjusted_value)
forecast_ets <- forecast(ets_model, h = 12)
plot(forecast_ets)
summary(ets_model)
## ETS(A,N,N)
##
## Call:
## ets(y = diff_adjusted_value)
##
## Smoothing parameters:
## alpha = 0.7212
##
## Initial states:
## l = -0.1318
##
## sigma: 0.4186
##
## AIC AICc BIC
## 1133.379 1133.463 1144.368
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.000504649 0.417185 0.3173277 59.49643 530.532 0.9864217
## ACF1
## Training set 0.08628949
Next, we will build a Random Forrest model with the US Stock Index Close data as input data.
The data does not need to be stationary for a Random Forrest model, so we will use the Adjusted_Value and and Close columns from the merged_df. We do not need to difference them. We are only using historical stock data as an input.
# Split into training and testing
set.seed(4617)
samp <- sample(1:nrow(merged_df), 0.8 * nrow(merged_df))
training <- merged_df[samp, ]
testing <- merged_df[-samp, ]
# Train
rf_model <- randomForest(Adjusted_Value ~ Close, data = training)
print(rf_model)
##
## Call:
## randomForest(formula = Adjusted_Value ~ Close, data = training)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 1
##
## Mean of squared residuals: 2.669523
## % Var explained: 55.3
# Predict
predictions <- predict(rf_model, newdata = testing)
# Summary Calcs
rmse <- rmse(testing$Adjusted_Value, predictions)
mae <- mae(testing$Adjusted_Value, predictions)
mape <- mape(testing$Adjusted_Value, predictions)
cat("\nRMSE:", rmse, "\n")
##
## RMSE: 1.7484
cat("MAE:", mae, "\n")
## MAE: 1.387151
cat("MAPE:", mape * 100)
## MAPE: 1.370739
# Plot
ggplot() +
geom_point(aes(x = testing$Adjusted_Value, y = predictions), color = "blue") +
geom_abline(slope = 1, intercept = 0, color = "red") +
labs(x = "Actuals", y = "Predicted", title = "Actuals vs Predicted")
Finally we will create the VAR model using Stock Close and Food Adjusted_Values as the inputs and compare our results to the previous models. Once we have gathered the summary data we can compare the four models and decide what model is most suitable for our purpose.
# Create time series abd months to predict
merge_df_ts <- merged_df[, c("Adjusted_Value", "Close")]
merge_df_ts <- ts(merge_df_ts, frequency = 365, start = c(2001, 1))
# Fit
var_model <- VAR(merge_df_ts, p = 1)
# Predict
months <- 12
forecast_data <- predict(var_model, n.ahead = months)
# Pull out actuals and predicted
actuals <- as.data.frame(merge_df_ts)
actuals$Date <- seq(from = as.Date("2001-01-01"), by = "day", length.out = nrow(actuals))
predicted <- as.data.frame(forecast_data$fcst$Adjusted_Value[, 1])
predicted$Date <- seq(from = max(actuals$Date) + 1, by = "day", length.out = months)
# Combine actuals and predicted
actuals$Type <- "Actual"
predicted$Type <- "Predicted"
colnames(predicted)[1] <- "Adjusted_Value"
plotdf <- rbind(actuals[, c("Date", "Adjusted_Value", "Type")],
predicted[, c("Date", "Adjusted_Value", "Type")])
# Plot
ggplot(plotdf, aes(x = Date, y = Adjusted_Value, color = Type)) +
geom_line(size = 1) +
labs(x = "Date", y = "Adjusted Value", title = "Actuals vs Predicted")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Get RMSE, MAE, and MAPE
# Align data
testing <- tail(actuals, months)
predictions <- predicted$Adjusted_Value
# Summary Calcs
summary(var_model)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: Adjusted_Value, Close
## Deterministic variables: const
## Sample size: 287
## Log Likelihood: -1373.57
## Roots of the characteristic polynomial:
## 0.995 0.9519
## Call:
## VAR(y = merge_df_ts, p = 1)
##
##
## Estimation results for equation Adjusted_Value:
## ===============================================
## Adjusted_Value = Adjusted_Value.l1 + Close.l1 + const
##
## Estimate Std. Error t value Pr(>|t|)
## Adjusted_Value.l1 0.9657373 0.0164046 58.870 <2e-16 ***
## Close.l1 0.0003561 0.0002282 1.561 0.120
## const 3.3402859 1.5851243 2.107 0.036 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.4146 on 284 degrees of freedom
## Multiple R-Squared: 0.9709, Adjusted R-squared: 0.9707
## F-statistic: 4744 on 2 and 284 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation Close:
## ======================================
## Close = Adjusted_Value.l1 + Close.l1 + const
##
## Estimate Std. Error t value Pr(>|t|)
## Adjusted_Value.l1 1.138e+00 6.778e-01 1.679 0.0943 .
## Close.l1 9.812e-01 9.428e-03 104.074 <2e-16 ***
## const -1.059e+02 6.549e+01 -1.618 0.1068
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 17.13 on 284 degrees of freedom
## Multiple R-Squared: 0.9904, Adjusted R-squared: 0.9903
## F-statistic: 1.459e+04 on 2 and 284 DF, p-value: < 2.2e-16
##
##
##
## Covariance matrix of residuals:
## Adjusted_Value Close
## Adjusted_Value 0.1719 -0.4012
## Close -0.4012 293.3627
##
## Correlation matrix of residuals:
## Adjusted_Value Close
## Adjusted_Value 1.00000 -0.05651
## Close -0.05651 1.00000
rmse_value <- rmse(testing$Adjusted_Value, predictions)
mae_value <- mae(testing$Adjusted_Value, predictions)
mape_value <- mape(testing$Adjusted_Value, predictions)
cat("\nRMSE:", rmse_value, "\n")
##
## RMSE: 0.3725
cat("MAE:", mae_value, "\n")
## MAE: 0.3299122
cat("MAPE:", mape_value * 100)
## MAPE: 0.3161966
Summary of Models:
ARIMA: - RMSE: 0.3524 - MAE: 0.2612 - MAPE: 333.62
MAPE is very large 333.62 so it struggles with predictions. But RMSE and MAE are lower than the others.
ETS: - RMSE: 0.417 - MAE: 0.317 - MAPE: 530.53
MAPE is even worse than ARIMA at 530.53. RMSE and MAE are also higher than ARIMA, so ETS is not the best.
Random Forrest: - RMSE: 1.617 - MAE: 1.288 - MAPE: 1.26
Has a lower MAPE by than the other two at 1.26 but the RMSE and MAE are higher than ARIMA. Explains 50.47% of the variance, so there is plenty of room for improvement.
VAR: - RMSE: 0.3725 - MAE: 0.3299 - MAPE: 0.3162
RMSE and MAE is better than Random Forrest and ETS. The MAPE is also almost as good as the Random Forrest. R^2 is very good at 97%.
At this point VAR appears to be the best option as it is easier to implement into an app and has decent summary metrics compared to the others. Random Forrest appears to be a decent option, and we may be able to improve upon it by using more predictor variables or tuning it. But we would have to develop a model that accurately predicts the future stock data and then use those as values to input to forecast into the future. ETS was by far the worst. ARIMA had the best RMSE and MAE but the MAPE was really bad.
Jaimie - For the food prices dataset: Data import, TS decomposition, TS visualizations, and description of TS
Morgan - For the stock index dataset: Data import, TS decomposition, TS visualizations, and description of TS
Dustin - Combining two datasets, creating the TS models, creating the TS predictions, and evaluating the models.